In [1]:
#Warning ignorance if generated

import warnings
warnings.filterwarnings("ignore")
In [2]:
#import necessary python packages for single-cell RNA SEQ analysis

import scanpy as sc #software suite of tools for single-cell analysis in python
import besca as bc #internal BEDA package for single cell analysis
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import scipy
import anndata as ad
from scipy.sparse import csr_matrix
import scanpy.external as sce
from harmony import harmonize
import umap.umap_ as umap
from scipy import io
print(ad.__version__)

sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)

# gives error!! sc.logging.print_versions()
INFO:torch.distributed.nn.jit.instantiator:Created a temporary directory at /tmp/tmpf68urjdb
INFO:torch.distributed.nn.jit.instantiator:Writing /tmp/tmpf68urjdb/_remote_module_non_scriptable.py
INFO:lightning_fabric.utilities.seed:Global seed set to 0
0.9.1
In [3]:
#Reading last saved annoatated data object written in h5ad data format. 
#We used similar adata variable to make similar previous data analysis 

save_file = '/home/jana/scanpy_qc_filtered_pbmcs_for_sarcoid.h5ad'
adata=sc.read_h5ad(save_file)
In [4]:
#Finding Marker genes

#Logarithmize the data

sc.pp.log1p(adata)

#Finding marker genes using Wilcoxon rank-sum method

sc.tl.rank_genes_groups(adata, 'leiden_0.7', method='wilcoxon')

#Showing Top 25 Genes scorer of each clusters

sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)
WARNING: adata.X seems to be already log-transformed.
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:05:05)
In [5]:
# Displaying a table of Top 5 highly differentialy highly scorer expressed genesfor each clusters in Leiden clustering with 0.7 resolution


pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(5)
Out[5]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
0 VCAN RPL30 CD79A IL7R RPS12 S100A8 CPVL CCL5 NKG7 LST1 TNFSF10 CST3 KLRB1 IL32 ITM2C KCTD12 PPBP SUB1 RPS4X TNFRSF18
1 LYZ RPS3A MS4A1 LTB RPS3A S100A9 FTH1 NKG7 GNLY FCGR3A MX1 HLA-DRB1 GZMK TRAC CCDC50 FOS PF4 STMN1 SNHG29 KLRB1
2 S100A9 RPL32 CD79B IL32 RPS6 MNDA FGL2 IL32 CST7 AIF1 PARP14 HLA-DPA1 KLRG1 RTKN2 LILRA4 NEAT1 NRGN C12orf75 NPM1 CTSW
3 S100A8 RPL21 CD37 TRAC CD8B S100A12 HLA-DPA1 CST7 GZMA SMIM25 IFI44L HLA-DQA1 IL32 CD3D IRF8 DUSP6 GP1BB PCLAF RPS6 AC004687.1
4 CD14 RPS15A CD74 TPT1 RPL32 VCAN AIF1 B2M PRF1 FCER1G IFIT3 HLA-DPB1 GZMA LTB PLD4 TNFAIP2 CAVIN2 PPIA GAS5 CD7
In [6]:
#Dotplot top five genes of each clusters inside adata with minimum log fold change =2 
sc.pl.rank_genes_groups_dotplot(adata, n_genes=5, values_to_plot='logfoldchanges', min_logfoldchange=2, vmax=7, vmin=-7, cmap='bwr')
WARNING: dendrogram data not found (using key=dendrogram_leiden_0.7). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently.
    using 'X_pca' with n_pcs = 50
Storing dendrogram info using `.uns['dendrogram_leiden_0.7']`
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored

In [7]:
#Matrix plot top five scorer genes of each clusters inside adata 

sc.pl.rank_genes_groups_matrixplot(adata, n_genes=5, use_raw=False, vmin=-3, vmax=3, cmap='bwr')
In [8]:
# Unique Genes list of 100 top high scores genes of all clusters present inside the adata

marker_gene_unique =['B2M', 'C12orf75', 'CAVIN2',
'CCDC50', 'CCL5', 'CD14', 'CD37', 'CD3D',
'CD7', 'CD74', 'CD79A', 'CD79B', 'CD8B',
'CPVL', 'CST3', 'CST7', 'CTSW', 'DUSP6', 
'FCER1G', 'FCGR3A','FGL2', 'FOS', 'FTH1', 
'GAS5', 'GNLY', 'GP1BB', 'GZMA', 'GZMK',
'HLA-DPA1', 'HLA-DPB1', 'HLA-DQA1', 'HLA-DRB1',
'IFI44L', 'IFIT3', 'IL32', 'IL7R', 'IRF8',
'ITM2C', 'KCTD12', 'KLRB1', 'KLRG1', 'LILRA4',
'LST1', 'LTB', 'LYZ', 'MNDA', 'MS4A1', 
'MX1', 'NEAT1', 'NKG7', 'NPM1', 'NRGN', 
'PARP14', 'PCLAF', 'PF4', 'PLD4', 'PPBP',
'PPIA', 'PRF1', 'RPL21', 'RPL30', 'RPL32',
'RPS12', 'RPS15A', 'RPS3A', 'RPS4X', 'RPS6', 
'RTKN2', 'S100A12', 'S100A8', 'S100A9', 'SMIM25', 
'SNHG29', 'STMN1', 'SUB1', 'TNFAIP2', 'TNFRSF18', 
'TNFSF10', 'TPT1', 'TRAC', 'VCAN']
In [9]:
#Dotplot of unique markers genes 
sc.pl.dotplot(adata, marker_gene_unique, groupby='leiden_0.7', dendrogram=True)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored

In [10]:
#B-cell's known markers genes: CD79A, MS4A1

sc.settings.set_figure_params(dpi=100)
sc.pl.violin(adata, ['CD79A', 'MS4A1'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/_settings.py:447: DeprecationWarning:

`set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()`

INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
In [11]:
# CD4 T-cell known markers genes: CD3D, IL7R


sc.settings.set_figure_params(dpi=100)
sc.pl.violin(adata, ['CD3D','IL7R'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
#sc.pl.violin(adata, ['CD8A'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/_settings.py:447: DeprecationWarning:

`set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()`

INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
In [12]:
#CD8 T cells known markers genes: CD8A

sc.pl.violin(adata, ['CD8A'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
In [13]:
#CD14+ Monocytes known markers genes: CD14, LYZ
sc.settings.set_figure_params(dpi=100)
sc.pl.violin(adata, ['CD14','LYZ'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/_settings.py:447: DeprecationWarning:

`set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()`

INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
In [14]:
# FCGR3A+ Monocytes known markers genes:FCGR3A, MS4A7
    
sc.settings.set_figure_params(dpi=100)
sc.pl.violin(adata, ['FCGR3A', 'MS4A7'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/_settings.py:447: DeprecationWarning:

`set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()`

INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
In [15]:
#NK cells known marker genes: GNLY, NKG7

sc.settings.set_figure_params(dpi=100)
sc.pl.violin(adata, ['GNLY', 'NKG7'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/_settings.py:447: DeprecationWarning:

`set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()`

INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
In [16]:
# Dendritic Cells known marker genes: FCER1A, CST3

sc.settings.set_figure_params(dpi=100)
sc.pl.violin(adata, ['FCER1A', 'CST3'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/_settings.py:447: DeprecationWarning:

`set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()`

INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
In [17]:
#Megakaryocytes cell marker gene: PPBP
sc.pl.violin(adata, ['PPBP'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
In [18]:
#Popular cell markers in a list
popular_marker_list=['CD79A', 'MS4A1', 'CD3D','IL7R', 'CD8A', 'CD14','LYZ', 'FCGR3A', 'MS4A7', 
                     'GNLY', 'NKG7', 'FCER1A', 'CST3', 'PPBP']
                     
In [19]:
#Dotplot of popular marker list in leiden clusters

sc.pl.dotplot(adata, popular_marker_list, groupby='leiden_0.7', dendrogram=True)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored

In [20]:
# After Violin plot seeking the expression changes different markers genes, 
#trying to make a temporary table for manual annotations

from tabulate import tabulate

table = [
         ['Cluster no', 'Markers','Type of Cell'], 
         ['0', 'CD14, LYZ','CD14+ Monocytes'], 
         ['1', 'CD3D, IL7R','CD4 T-cell'], 
         ['2', 'CD79A, MS4A1','B-cell'],
         ['3', 'CD3D, IL7R','CD4 T-cell'], 
         ['4', 'CD3D, IL7R, CD8A','CD4 T-cell and CD8 T cell'], 
         ['5', 'CD14, LYZ','CD14+ Monocytes'],
         ['6', 'CD14, LYZ','CD14+ Monocytes'], 
         ['7', 'CD3D, IL7R, CD8A, GNLY, NKG7' ,'CD4 T-cell, CD8 T cell and NK cells'], 
         ['8', 'FCGR3A, MS4A7','FCGR3A+ Monocytes'],
         ['9', 'FCGR3A, MS4A7','FCGR3A+ Monocytes'],
         ['10', 'CD14, LYZ','CD14+ Monocytes'],
         ['11', 'FCER1A, CST3','Dendritic Cells'],
         ['12', 'CD3D, IL7R, GNLY, NKG7','CD4 T-cell and NK cells'],
         ['13', 'CD3D, IL7R','CD4 T-cell'],
         ['14', 'FCER1A, CST3','Dendritic Cells'],
         ['15', 'CD14, LYZ','CD14+ Monocytes'],
         ['16', 'PPBP','Megakaryocytes cells'],
         ['17', 'CD3D, IL7R, GNLY, NKG7','CD4 T-cell and NK cells'],
         ['18', 'CD3D, IL7R, CD8A','CD4 T-cell and CD8 T cell'],
         ['19', 'CD14,LYZ, GNLY, NKG7','CD14+ Monocytes and NK cells'],
        
        
        
        
        ]

print(tabulate(table))
----------  ----------------------------  -----------------------------------
Cluster no  Markers                       Type of Cell
0           CD14, LYZ                     CD14+ Monocytes
1           CD3D, IL7R                    CD4 T-cell
2           CD79A, MS4A1                  B-cell
3           CD3D, IL7R                    CD4 T-cell
4           CD3D, IL7R, CD8A              CD4 T-cell and CD8 T cell
5           CD14, LYZ                     CD14+ Monocytes
6           CD14, LYZ                     CD14+ Monocytes
7           CD3D, IL7R, CD8A, GNLY, NKG7  CD4 T-cell, CD8 T cell and NK cells
8           FCGR3A, MS4A7                 FCGR3A+ Monocytes
9           FCGR3A, MS4A7                 FCGR3A+ Monocytes
10          CD14, LYZ                     CD14+ Monocytes
11          FCER1A, CST3                  Dendritic Cells
12          CD3D, IL7R, GNLY, NKG7        CD4 T-cell and NK cells
13          CD3D, IL7R                    CD4 T-cell
14          FCER1A, CST3                  Dendritic Cells
15          CD14, LYZ                     CD14+ Monocytes
16          PPBP                          Megakaryocytes cells
17          CD3D, IL7R, GNLY, NKG7        CD4 T-cell and NK cells
18          CD3D, IL7R, CD8A              CD4 T-cell and CD8 T cell
19          CD14,LYZ, GNLY, NKG7          CD14+ Monocytes and NK cells
----------  ----------------------------  -----------------------------------
In [21]:
#AZIMUTH MARKERS GENES LEVEL 2 as in a DICT

azimuth_default_markers = {

'B_interm': ['MS4A1', 'TNFRSF13B', 'IGHM', 'IGHD', 'AIM2', 'CD79A', 'LINC01857', 'RALGPS2', 'BANK1', 'CD79B'],
'B_memory': ['MS4A1','COCH', 'AIM2', 'BANK1', 'SSPN', 'CD79A', 'TEX9', 'RALGPS2', 'TNFRSF13C', 'LINC01781'],
'B_naive':['IGHM', 'IGHD', 'CD79A', 'IL4R', 'MS4A1', 'CXCR4', 'BTG1', 'TCL1A', 'CD79B', 'YBX3'],
'Plasmablast': ['IGHA2', 'MZB1', 'TNFRSF17', 'DERL3', 'TXNDC5', 'TNFRSF13B', 'POU2AF1', 'CPNE5', 'NT5DC2'],
'CD4_CTL': ['GZMH', 'CD4', 'FGFBP2', 'ITGB1', 'GZMA', 'CST7', 'GNLY', 'B2M', 'IL32', 'NKG7'],
'CD4_Naive': ['TCF7', 'CD4', 'CCR7', 'IL7R', 'FHIT', 'LEF1', 'MAL', 'NOSIP', 'LDHB', 'PIK3IP1'],
'CD4_Prolif': ['MKI67', 'TOP2A', 'PCLAF', 'CENPF', 'TYMS', 'NUSAP1', 'ASPM', 'PTTG1', 'TPX2', 'RRM2'],
'CD4_TCM': ['IL7R', 'TMSB10', 'CD4', 'ITGB1', 'LTB', 'TRAC', 'AQP3', 'LDHB', 'IL32', 'MAL'],
'CD4_TEM': ['IL7R', 'CCL5', 'FYB1', 'GZMK', 'IL32', 'GZMA', 'KLRB1', 'TRAC', 'LTB', 'AQP3'],
'Treg':['RTKN2', 'FOXP3', 'AC133644.2', 'CD4', 'IL2RA', 'TIGIT', 'CTLA4', 'FCRL3', 'LAIR2', 'IKZF2'],
'CD8_Naive' : ['CD8B', 'S100B', 'CCR7', 'RGS10', 'NOSIP', 'LINC02446', 'LEF1', 'CRTAM', 'CD8A', 'OXNAD1'],
'CD8_Prolif' : ['MKI67', 'CD8B', 'TYMS', 'TRAC', 'PCLAF', 'CD3D', 'CLSPN', 'CD3G', 'TK1', 'RRM2'],
'CD8_TCM':	['CD8B', 'ANXA1', 'CD8A', 'KRT1', 'LINC02446', 'YBX3', 'IL7R', 'TRAC', 'NELL2', 'LDHB'],
'CD8_TEM': ['CCL5', 'GZMH', 'CD8A', 'TRAC', 'KLRD1', 'NKG7', 'GZMK', 'CST7', 'CD8B', 'TRGC2'],
'ASDC':	['PPP1R14A', 'LILRA4', 'AXL', 'IL3RA', 'SCT', 'SCN9A', 'LGMN', 'DNASE1L3', 'CLEC4C', 'GAS6'],
'cDC1':	['CLEC9A', 'DNASE1L3', 'C1orf54', 'IDO1', 'CLNK', 'CADM1', 'FLT3', 'ENPP1', 'XCR1', 'NDRG2'],
'cDC2':['FCER1A', 'HLA-DQA1', 'CLEC10A', 'CD1C', 'ENHO','PLD4', 'GSN', 'SLC38A1', 'NDRG2', 'AFF3'],
'pDC': ['ITM2C', 'PLD4', 'SERPINF1', 'LILRA4', 'IL3RA', 'TPM2', 'MZB1', 'SPIB', 'IRF4', 'SMPD3'],
'CD14_Mono': ['S100A9', 'CTSS', 'S100A8', 'LYZ', 'VCAN', 'S100A12', 'IL1B', 'CD14', 'G0S2', 'FCN1'],
'CD16_Mono': ['CDKN1C', 'FCGR3A', 'PTPRC', 'LST1', 'IER5', 'MS4A7', 'RHOC', 'IFITM3', 'AIF1', 'HES4'],
'NK': ['GNLY', 'TYROBP', 'NKG7', 'FCER1G', 'GZMB', 'TRDC', 'PRF1', 'FGFBP2', 'SPON2', 'KLRF1'],
'NK_Prolif': ['MKI67', 'KLRF1', 'TYMS', 'TRDC', 'TOP2A', 'FCER1G', 'PCLAF', 'CD247', 'CLSPN', 'ASPM'],
'NK_CD56br': ['XCL2', 'FCER1G', 'SPINK2', 'TRDC', 'KLRC1', 'XCL1', 'SPTSSB', 'PPP1R9A', 'NCAM1', 'TNFRSF11A'],
'Eryth': ['AHSP', 'ALAS2', 'CA1', 'SLC4A1', 'IFIT1B', 'TRIM58', 'SELENBP1', 'TMCC2'],
'HSPC':	['SPINK2', 'PRSS57', 'CYTL1', 'EGFL7', 'GATA2', 'CD34', 'SMIM24', 'AVP', 'MYB', 'LAPTM4B'],
'ILC': ['KIT', 'TRDC', 'TTLL10', 'LINC01229', 'SOX4', 'KLRB1', 'TNFRSF18', 'TNFRSF4', 'IL1R1', 'HPGDS'],
'Platelet': ['PPBP', 'PF4', 'NRGN', 'GNG11', 'CAVIN2', 'TUBB1', 'CLU', 'HIST1H2AC', 'RGS18', 'GP9'],
'dnT': ['PTPN3', 'MIR4422HG', 'NUCB2', 'CAV1', 'DTHD1', 'GZMA', 'MYB', 'FXYD2', 'GZMK', 'AC004585.1'],
'gdT':['TRDC', 'TRGC1', 'TRGC2', 'KLRC1', 'NKG7', 'TRDV2', 'CD7', 'TRGV9', 'KLRD1', 'KLRG1'],
'MAIT':	['KLRB1', 'NKG7', 'GZMK', 'IL7R', 'SLC4A10', 'GZMA', 'CXCR6', 'PRSS35', 'RBM24', 'NCR3']

}
In [22]:
#AZIMUTH MARKERS GENES another representation

B_interm = ['MS4A1', 'TNFRSF13B', 'IGHM', 'IGHD', 'AIM2', 'CD79A', 'LINC01857', 'RALGPS2', 'BANK1', 'CD79B']
B_memory = ['MS4A1','COCH', 'AIM2', 'BANK1', 'SSPN', 'CD79A', 'TEX9', 'RALGPS2', 'TNFRSF13C', 'LINC01781']
B_naive = ['IGHM', 'IGHD', 'CD79A', 'IL4R', 'MS4A1', 'CXCR4', 'BTG1', 'TCL1A', 'CD79B', 'YBX3']
Plasmablast = ['IGHA2', 'MZB1', 'TNFRSF17', 'DERL3', 'TXNDC5', 'TNFRSF13B', 'POU2AF1', 'CPNE5', 'HNT5DC2']
CD4_CTL = ['GZMH', 'CD4', 'FGFBP2', 'ITGB1', 'GZMA', 'CST7', 'GNLY', 'B2M', 'IL32', 'NKG7']
CD4_Naive = ['TCF7', 'CD4', 'CCR7', 'IL7R', 'FHIT', 'LEF1', 'MAL', 'NOSIP', 'LDHB', 'PIK3IP1']
CD4_Prolif = ['MKI67', 'TOP2A', 'PCLAF', 'CENPF', 'TYMS', 'NUSAP1', 'ASPM', 'PTTG1', 'TPX2', 'RRM2']
CD4_TCM = ['IL7R', 'TMSB10', 'CD4', 'ITGB1', 'LTB', 'TRAC', 'AQP3', 'LDHB', 'IL32', 'MAL']
CD4_TEM  =['IL7R', 'CCL5', 'FYB1', 'GZMK', 'IL32', 'GZMA', 'KLRB1', 'TRAC', 'LTB', 'AQP3']
Treg = ['RTKN2', 'FOXP3', 'AC133644.2', 'CD4', 'IL2RA', 'TIGIT', 'CTLA4', 'FCRL3', 'LAIR2', 'IKZF2']
CD8_Naive =['CD8B', 'S100B', 'CCR7', 'RGS10', 'NOSIP', 'LINC02446', 'LEF1', 'CRTAM', 'CD8A', 'OXNAD1']
CD8_Prolif =['MKI67', 'CD8B', 'TYMS', 'TRAC', 'PCLAF', 'CD3D', 'CLSPN', 'CD3G', 'TK1', 'RRM2']
CD8_TCM =['CD8B', 'ANXA1', 'CD8A', 'KRT1', 'LINC02446', 'YBX3', 'IL7R', 'TRAC', 'NELL2', 'LDHB']
CD8_TEM	 = ['CCL5', 'GZMH', 'CD8A', 'TRAC', 'KLRD1', 'NKG7', 'GZMK', 'CST7', 'CD8B', 'TRGC2']
ASDC =['PPP1R14A', 'LILRA4', 'AXL', 'IL3RA', 'SCT', 'SCN9A', 'LGMN', 'DNASE1L3', 'CLEC4C', 'GAS6']
cDC1 =['CLEC9A', 'DNASE1L3', 'C1orf54', 'IDO1', 'CLNK', 'CADM1', 'FLT3', 'ENPP1', 'XCR1', 'NDRG2']
cDC2 =['FCER1A', 'HLA-DQA1', 'CLEC10A', 'CD1C', 'ENHO','PLD4', 'GSN', 'SLC38A1', 'NDRG2', 'AFF3']
pDC =['ITM2C', 'PLD4', 'SERPINF1', 'LILRA4', 'IL3RA', 'TPM2', 'MZB1', 'SPIB', 'IRF4', 'SMPD3']
CD14_Mono = ['S100A9', 'CTSS', 'S100A8', 'LYZ', 'VCAN', 'S100A12', 'IL1B', 'CD14', 'G0S2', 'FCN1']
CD16_Mono = ['CDKN1C', 'FCGR3A', 'PTPRC', 'LST1', 'IER5', 'MS4A7', 'RHOC', 'IFITM3', 'AIF1', 'HES4']
NK = ['GNLY', 'TYROBP', 'NKG7', 'FCER1G', 'GZMB', 'TRDC', 'PRF1', 'FGFBP2', 'SPON2', 'KLRF1']
NK_Prolif = ['MKI67', 'KLRF1', 'TYMS', 'TRDC', 'TOP2A', 'FCER1G', 'PCLAF', 'CD247', 'CLSPN', 'ASPM']
NK_CD56br = ['XCL2', 'FCER1G', 'SPINK2', 'TRDC', 'KLRC1', 'XCL1', 'SPTSSB', 'PPP1R9A', 'NCAM1', 'TNFRSF11A']
Eryth =['HBD', 'HBM', 'AHSP', 'ALAS2', 'CA1', 'SLC4A1', 'IFIT1B', 'TRIM58', 'SELENBP1', 'TMCC2']
HSPC =['SPINK2', 'PRSS57', 'CYTL1', 'EGFL7', 'GATA2', 'CD34', 'SMIM24', 'AVP', 'MYB', 'LAPTM4B']
ILC =['KIT', 'TRDC', 'TTLL10', 'LINC01229', 'SOX4', 'KLRB1', 'TNFRSF18', 'TNFRSF4', 'IL1R1', 'HPGDS']
Platelet =['PPBP', 'PF4', 'NRGN', 'GNG11', 'CAVIN2', 'TUBB1', 'CLU', 'HIST1H2AC', 'RGS18', 'GP9']
dnT =['PTPN3', 'MIR4422HG', 'NUCB2', 'CAV1', 'DTHD1', 'GZMA', 'MYB', 'FXYD2', 'GZMK', 'AC004585.1']
gdT =['TRDC', 'TRGC1', 'TRGC2', 'KLRC1', 'NKG7', 'TRDV2', 'CD7', 'TRGV9', 'KLRD1', 'KLRG1']
MAIT =['KLRB1', 'NKG7', 'GZMK', 'IL7R', 'SLC4A10', 'GZMA', 'CXCR6', 'PRSS35', 'RBM24', 'NCR3']
In [23]:
#azimuth markers DOTplot1
sc.settings.set_figure_params(dpi=70)
sc.pl.dotplot(adata, azimuth_default_markers, groupby='leiden_0.7', dendrogram=True)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/_settings.py:447: DeprecationWarning:

`set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()`

WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different.
categories: 0, 1, 2, etc.
var_group_labels: B_interm, B_memory, B_naive, etc.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored

In [24]:
#initial_annotation 

new_cluster_names = ['CD14+ Mono', #0
                     'Naive CD4T', #1
                     'B cell', #2
                     'CD4 T', #3
                     'Naive CD8', #4
                     'CD14+ Mono', #5
                     'CD14+ Mono', #6
                     'Mixed', #7
                     'NK', #8
                     'CD16 Mono', #9
                     'CD14+ Mono', #10
                     'DC', #11
                     'CD8M', #12
                     'Tregs', #13
                     'DC', #14
                     'Monocytes', #15
                     'Platelets', #16
                     'Mixed', #17
                    'CD8T', #18
                    'Mixed NK'] #19

bc.tl.annotate_cells_clustering(adata=adata, clustering_label='leiden_0.7', new_annotation_label='initial_annotation', new_cluster_labels=new_cluster_names)
In [25]:
#Original clusters made from Leiden 0.7 resolution and Initial annotations of that clusters

sc.pl.umap(adata, color = ['leiden_0.7', 'initial_annotation'], wspace = 0.1, legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning:

The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.

/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

In [26]:
#write_the result

#import scipy io package
from scipy import io

save_file = '/home/jana/scanpy_qc_filtered_pbmcs_for_sarcoid.h5ad'
adata.write_h5ad(save_file)
In [ ]: